\documentclass[english,t]{beamer}
%\documentclass[handout,english]{beamer}

\mode<presentation>
{
  \usetheme{Copenhagen}
  % oder ...
  
%  \setbeamercovered{transparent}
  % oder auch nicht
}

%\usepackage[pdftex]{graphicx}
\graphicspath{{/home/ave/doc/images/}{}{../teranaloppu/}{../metodi/}{../slides/Hartikainen/}{../gphealth/}{../2008_09_RSS2008/}{../gphealth/}{../jyvaskyla2009/}{../nbbc2009/}{../gphealth/hippics/}{../euroheis2010/}{../pubgensens2011/}{../reykjavik2013/}{../liverpool2013/}{../../gpstuff/doc/}{./images/}{../aalto_stochastic/}{figs/}{../Stancon2018Helsinki/figs/}{../../paper/cvapprox/}{../gppa2017/}{../valencia2017/}{../../paper/combine_predictive_distribution/tex/}{../venice2018/figs/}{./figs/}}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{times}
\usepackage{amsmath,amsfonts,amssymb}
%\usepackage{euscript}
\usepackage{afterpage}
%\usepackage{picinpar}
%\usepackage{array,longtable}
\usepackage{url}
\urlstyle{same}
%\usepackage{eufrak}
\usepackage{amsbsy}
\usepackage{eucal}
\usepackage{rotating}
\usepackage{bm}
\usepackage{pdfpages}
\usepackage{algorithm}
\usepackage[noend]{algpseudocode}
\usepackage{booktabs}
\usepackage{listings}
\usepackage{lstbayes}
\usepackage{microtype}

\usepackage{natbib}
\bibliographystyle{apalike}

\hypersetup{%
  bookmarksopen=true,
  bookmarksnumbered=true,
  pdftitle={Stan},
  pdfsubject={Bayesian data analysis},
  pdfauthor={Aki Vehtari},
  pdfkeywords={},
  pdfstartview={FitH -32768},
  colorlinks=true,
  linkcolor=navyblue,
  citecolor=navyblue,
  filecolor=navyblue,
  urlcolor=navyblue
}

%%%%%%%%%%%%%%%%%%% for tikz figures %%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{ifthen}
\usepackage{tikz,pgfplots}
\usetikzlibrary{matrix}
\usetikzlibrary{calc}
\newlength{\figurewidth}
\newlength{\figureheight}


\def\figpdfdir{./fig/} % directory for pdf-figures
\def\figtikzdir{./tikz/} % directory for tikz-figures 

% this is replacement for the \input command used in the figure-environment which
% takes into account whether pdf is forced
\newcommand{\minput}[2][]{
\ifthenelse{\equal{#1}{pdf}}
	{ \includegraphics{\figpdfdir #2} }
	{ \tikzset{external/remake next} \tikzsetnextfilename{#2} \input{\figtikzdir #2} }
}

% for externalization
\usetikzlibrary{external}
\tikzexternalize[prefix=\figpdfdir] 
\tikzset{external/system call={lualatex
	\tikzexternalcheckshellescape -halt-on-error -interaction=batchmode
	-jobname "\image" "\texsource"}}
    
%%%%%%%%%%%%%%%%%%% for hiding figures %%%%%%%%%%%%%%%%%%%%%%%%%%
\usepackage{color}
\newcommand{\hide}[5][white]{
	% usage: \hhide[color]{vspace,hspace,height,width}
	% note: all measures are relative units measured in \textwidth
	%\begin{minipage}{0.99\textwidth}
	\vspace{#2\textwidth}
	\hspace{#3\textwidth}
	\textcolor{#1}{  \rule{#5\textwidth}{#4\textwidth}  }
	% \end{minipage}
      }

\DeclareMathOperator{\Kfu}{\mathbf{K}_{f,u}}
\DeclareMathOperator{\Kuf}{\mathbf{K}_{u,f}}
\DeclareMathOperator{\Kff}{\mathbf{K}_{f,f}}
\DeclareMathOperator{\iKff}{\mathbf{K}_{f,f}^{-1}}
\DeclareMathOperator{\Kfa}{\mathbf{K}_{f,\tilde{f}}}
\DeclareMathOperator{\Kaf}{\mathbf{K}_{\tilde{f},f}}
\DeclareMathOperator{\Kaa}{\mathbf{K}_{\tilde{f},\tilde{f}}}
\DeclareMathOperator{\Kuu}{\mathbf{K}_{u,u}}
\DeclareMathOperator{\iKuu}{\mathbf{K}_{u,u}^{-1}}
\DeclareMathOperator{\Kau}{\mathbf{K}_{\tilde{f},u}}
\DeclareMathOperator{\Kua}{\mathbf{K}_{u,\tilde{f}}}
\DeclareMathOperator{\Qff}{\mathbf{Q}_{f,f}}
\DeclareMathOperator{\Qaa}{\mathbf{Q}_{\tilde{f},\tilde{f}}}
\DeclareMathOperator{\Qfa}{\mathbf{Q}_{f,\tilde{f}}}
\DeclareMathOperator{\Qaf}{\mathbf{Q}_{\tilde{f},f}}
\DeclareMathOperator{\x}{\mathbf{x}}
\DeclareMathOperator{\f}{\mathbf{f}}
\DeclareMathOperator{\y}{\mathbf{y}}
\DeclareMathOperator{\h}{\mathbf{h}}
\DeclareMathOperator{\uu}{\mathbf{u}}
\DeclareMathOperator{\LL}{\mathbf{\Lambda}}
\DeclareMathOperator{\bb}{\mathbf{b}}
\DeclareMathOperator{\E}{\mathrm{E}}
\def\WAIC{\mathrm{WAIC}}

\newcommand{\kin}{k^{\rm in}}
\newcommand{\kout}{k^{\rm out}}
\newcommand{\gi}{{R_0}}
\newcommand{\eff}{{E_{\rm max}}}
\newcommand{\HN}{{\rm N^+}}
\newcommand{\lN}{{\rm LN}}
\newcommand{\Rss}{R^{\rm ss}}
\newcommand{\invlogit}{\mbox{logit}^{-1}}

% \DeclareMathOperator{\Poisson}{Poisson}
\DeclareMathOperator{\Chi}{Chi}
\DeclareMathOperator{\GP}{\mathcal{GP}}
%\DeclareMathOperator{\N}{N}
\DeclareMathOperator{\KL}{KL}

\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\newcommand{\mb}{\mathbf}
\newcommand{\pkg}[1]{{\fontseries{b}\selectfont #1}}
\newcommand{\proglang}{}
\newcommand{\email}[1]{\href{mailto:#1}{\normalfont\texttt{#1}}}
\newcommand{\doi}[1]{\href{http://dx.doi.org/#1}{\normalfont\texttt{doi:#1}}}
\newcommand{\code}[1]{{\normalfont\texttt{#1}}}

% \DeclareMathOperator{\E}{E}
% \DeclareMathOperator{\VAR}{Var}
% \DeclareMathOperator{\COV}{Cov}
% \DeclareMathOperator{\Prob}{P}
% \DeclareMathOperator{\E}{E}
\DeclareMathOperator{\Var}{Var}
\DeclareMathOperator{\var}{var}
\DeclareMathOperator{\cov}{cov}
\DeclareMathOperator{\logistic}{logistic}
\DeclareMathOperator{\softmax}{softmax}
\DeclareMathOperator{\Multinomial}{Multinomial}
\DeclareMathOperator{\Sd}{Sd}
\DeclareMathOperator{\sd}{sd}
\DeclareMathOperator{\Bin}{Bin}
\DeclareMathOperator{\Poisson}{Poisson}
\DeclareMathOperator{\Beta}{Beta}
\DeclareMathOperator{\logit}{logit}
\DeclareMathOperator{\N}{N}
\DeclareMathOperator{\U}{U}
\DeclareMathOperator{\BF}{BF}
%\DeclareMathOperator{\Pr}{Pr}
\def\euro{{\footnotesize \EUR\, }}
\DeclareMathOperator{\rep}{\mathrm{rep}}

\definecolor{set11}{HTML}{E41A1C}
\definecolor{set12}{HTML}{377EB8}
\definecolor{greenish}{rgb}{0.1333,0.8666,0.1333}
\definecolor{forestgreen}{rgb}{0.1333,0.5451,0.1333}
\definecolor{hutblue}{rgb}{0,0.2549,0.6784}
\definecolor{midnightblue}{rgb}{0.0977,0.0977,0.4375}
\definecolor{navyblue}{rgb}{0,0,0.5}
\definecolor{hutsilver}{rgb}{0.4863,0.4784,0.4784}
\definecolor{lightgray}{rgb}{0.95,0.95,0.95}
\definecolor{section}{rgb}{0,0.2549,0.6784}
\definecolor{list1}{rgb}{0,0.2549,0.6784}
\renewcommand{\emph}[1]{\textcolor{navyblue}{#1}}

%\graphicspath{./pics}

\pdfinfo{
  /Title      (Bayesian data analysis ch7)
  /Author     (Aki Vehtari) %
  /Keywords   (Bayesian probability theory, Bayesian inference, Bayesian data analysis)
}

\parindent=0pt
\parskip=8pt
\tolerance=9000
\abovedisplayshortskip=0pt

%\renewcommand{\itemsep}{0pt}
% Lists
\newenvironment{list1}{
   \begin{list}{$\color{list1}\bullet$}{\itemsep=6pt}}{
  \end{list}}
\newenvironment{list1s}{
  \begin{list}{$\includegraphics[width=5pt]{logo.eps}$}{\itemsep=6pt}}{
  \end{list}}
\newenvironment{list2}{
  \begin{list}{-}{\baselineskip=12pt\itemsep=2pt}}{
  \end{list}}
\newenvironment{list3}{
  \begin{list}{$\cdot$}{\baselineskip=15pt}}{
  \end{list}}

\setbeamertemplate{navigation symbols}{}
\setbeamertemplate{headline}[default]{}
\setbeamertemplate{headline}[text line]{\insertsection}
\setbeamertemplate{footline}[frame number]


\title[]{Bayesian data analysis}
\subtitle{}

\author{Aki Vehtari}

\institute[Aalto]{}
 
\date[]{}

%\beamerdefaultoverlayspecification{<+->}

\begin{document}

\begin{frame}{}

  {\Large\color{navyblue} Rich model vs feature selection?}

  \begin{itemize}
  \item If we care only about the predictive performance
    \begin{itemize}
    \item Include all available prior information
    \item Integrate over all uncertainties
    \item No need for feature selection
    \end{itemize}
  \item<2-> Variable selection can be useful if
    \begin{itemize}
    \item need to reduce measurement or computation cost in the future
    \item improve explainability
    \end{itemize}
  \item<3-> Two options for variable selection
    \begin{itemize}
    \item Find a minimal subset of features that yield a good
      predictive model
    \item Identify all features that have predictive information
    \end{itemize}
  \end{itemize}
  
\end{frame}

\begin{frame}{}

  {\Large\color{navyblue} Note on the terminology}
  
\begin{itemize}
\item Two different problems
  \begin{itemize}
  \item Find a \emph{minimal} subset of features $x_j$ that yield a good predictive model for $y$
  \item Identify \emph{all} features $x_j$ that are statistically related to $y$
  \end{itemize}\pause
  I will focus here to the first case
\end{itemize}

\end{frame}

\begin{frame}{}

  {\Large\color{navyblue} Why shrinkage priors alone do not solve the
    variable selection problem}

  \begin{itemize}
  \item A common strategy:
    \begin{itemize}
    \item Fit model with a shrinkage prior
    \item Select variables based on marginal posteriors (of the
      regression coefficients)
    \end{itemize}
  \item<2-> Problems
    \begin{itemize}
    \item Marginal posteriors are difficult with correlated features
    \item How to do post-selection inference correctly?
    \end{itemize}
  \end{itemize}
\end{frame}

\begin{frame}{}

  {\Large\color{navyblue} Example}

Consider data
\begin{equation*}
  \color{gray}
\begin{alignedat}{2}
  \only<1>{\color{black}}f & \only<1>{\color{black}} \sim \N(0,1), &&  \\
  \only<2>{\color{black}}y \mid f &\only<2>{\color{black}}\sim \N(f, 1) && \\
  \only<3>{\color{black}}x_j \mid f &\only<3>{\color{black}}\sim \N(\sqrt{\rho}f,\, 1-\rho), \qquad &&\only<3>{\color{black}} j = 1,\dots,25 \,, \\
  \only<4>{\color{black}}x_j \mid f &\only<4>{\color{black}}\sim \N(0, 1), && \only<4>{\color{black}}j = 26,\dots, 50 \,.
\end{alignedat}
\end{equation*}

\begin{itemize}
\item<2-> $y$ are noisy observations about latent $f$ \pause
\item<3-> First $p_\text{rel}=25$ features are correlated with $\rho$ and predictive about $y$ \pause
\item<4-> Remaining $25$ features are irrelevant random noise
\end{itemize}
\uncover<5>{Generate one data set $\{ x^{(i)}, y^{(i)}\}^n_{i=1}$ with $n=50$ and $\rho=0.8$ and assess the feature relevances}

\end{frame}

\begin{frame}{}

  {\Large\color{navyblue} Example}

  \makebox[12.1cm][t]{
    \hspace{-0.9cm}
  \begin{minipage}[b][4cm][t]{12.1cm}
  \includegraphics[width=4cm]{toy_marginal1.pdf}
  \uncover<2->{\includegraphics[width=4cm]{toy_marginal2.pdf}}
  \uncover<3->{\includegraphics[width=4cm]{toy_marginal3.pdf}}\\
\end{minipage}
}\\
\uncover<1->{
	A) Gaussian prior, posterior median with 50\% and 90\% intervals\\ 
	}
	\uncover<2->{
	B) Horseshoe prior, same things\\ 
	}
	\uncover<3->{
	C) Spike-and-slab prior, posterior inclusion probabilities\\ 
      }
      ~\\
      \uncover<4>{Half of the features relevant, but all marginals
        substantially overlapping with zero}
\end{frame}

\begin{frame}{}

  {\Large\color{navyblue} What happens?}

  \makebox[12.1cm][t]{
    \hspace{-0.3cm}
    \begin{minipage}[b][8.1cm][t]{12.1cm}
      \makebox[0cm][t]{\hspace{-0.4cm}\rotatebox{90}{\hspace{1.4cm}Gaussian}}
      \includegraphics[width=3.7cm]{toy_scatter1.pdf}
      \uncover<2->{\includegraphics[width=3.7cm]{toy_scatter3.pdf}}
      \uncover<3->{\includegraphics[width=3.7cm]{toy_scatter5.pdf}}\\
      \makebox[0cm][t]{\hspace{-0.4cm}\rotatebox{90}{\hspace{1.4cm}Horseshoe}}
      \includegraphics[width=3.7cm]{toy_scatter2.pdf}
      \uncover<2->{\includegraphics[width=3.7cm]{toy_scatter4.pdf}}
      \uncover<3->{\includegraphics[width=3.7cm]{toy_scatter6.pdf}}\\
      \makebox[3.7cm]{$\quad p_\text{rel}=2$}
      \uncover<2->{\makebox[3.7cm]{$\quad p_\text{rel}=5$}}
      \uncover<3->{\makebox[3.7cm]{$\quad p_\text{rel}=25$}}
\end{minipage}
}
\end{frame}

\begin{frame}{}

  {\Large\color{navyblue} Focus on predictive performance}

  \begin{itemize}
  \item Two stage approach
    \begin{itemize}
    \item Construct a best predictive model you can\\ $\Rightarrow$ {\it reference model}
    \item Variable selection and post-selection inference\\ $\Rightarrow$ {\it projection}
    \end{itemize}
  \item<2-> Instead of looking at the marginals, find the minimal
    subset of features which have (almost) the same predictive
    performance as the reference model
  \end{itemize}
  
\end{frame}

\begin{frame}{}

  {\Large\color{navyblue} Reference model improves variable selection}

  Same data generating mechanism, but\\ $n=30$, $p=500$, $p_\text{rel}=150$, $\rho=0.5$.

  \includegraphics[width=5.5cm]{toy_corr1.pdf}\\
  \vspace{-0.2cm}
  \hspace{0.5cm} {\color{set12} irrelevant $x_j$}, {\color{set11} relevant $x_j$}\\
  \vspace{0.2cm}
  \hspace{0.5cm} Sample correlation with $y$\\
  
\end{frame}

\begin{frame}{}

  {\Large\color{navyblue} Reference model improves variable selection}

  \includegraphics[width=5.5cm]{toy_corr2.pdf}
  \uncover<2->{\includegraphics[width=5.5cm]{toy_corr3.pdf}}\\
  \vspace{-0.2cm}
  \hspace{0.5cm} {\color{set12} irrelevant $x_j$}, {\color{set11} relevant $x_j$}\\
  \vspace{0.2cm}
  A) Sample correlation with $y$ vs. sample correlation with $f$\\
  \uncover<2->{B) Sample correlation with $y$ vs. sample correlation with $f_*$\\
  $f_* = $ linear regression fit with 3 supervised principal components}
  
\end{frame}

% \begin{frame}{}

%   {\Large\color{navyblue} (Iterative) Supervised Principal Components}

%   \begin{itemize}
%   \item Dimension reduction for high dimensional small data with
%     highly correlating features
%     \begin{itemize}
%     \item dimension reduction helps to speed up later computation
%       without discarding much information
%     \item supervised means that features correlating with the target
%       are favored in construcing the principal components
%     \end{itemize}
%     {\small
%     \item     Piironen and Vehtari (2018). Iterative supervised principal components. 21st AISTATS, PMLR 84:106-114. \href{http://proceedings.mlr.press/v84/piironen18a.html}{Online}.
%     }
%   \end{itemize}
  
% \end{frame}

\begin{frame}{}

  {\Large\color{navyblue} Predictive projection, idea}
  
  \begin{itemize}
  \item Model simplification technique
  \item<2-> Replace full posterior $p(\theta \mid D)$ with
    some constrained $q(\theta)$ so that the \emph{predictive
      distribution} changes as little as possible
  \item<3-> Example constraints
    \begin{itemize}
    \item $q(\theta)$ can have only point mass at some $\theta_0$ \\
      $\Rightarrow$ ``Optimal point estimates''
    \item<4-> Some features must have exactly zero regression coefficient \\
      $\Rightarrow$ ``Which features can be discarded''
    \end{itemize}
    \vspace{1\baselineskip}
  \item<5-> The decision theoretic idea of conditioning the smaller
    model inference on the full model can be tracked to Lindley (1968)
    \begin{itemize}
    \item draw by draw projection introduced by Goutis \& Robert
      (1998), and Dupuis \& Robert (2003)
    \item see also many related references in a review by
      \href{http://dx.doi.org/10.1214/12-SS102}{Vehtari \& Ojanen
        (2012)}
    \end{itemize}
\end{itemize}

\end{frame}

\begin{frame}{}

  {\Large\color{navyblue} Logistic regression with two features}

  \vspace{\baselineskip}
  
  \begin{overlayarea}{\textwidth}{0.7\textwidth}
    \begin{minipage}{0.99\textwidth}
      \begin{center}
        \only<1>{ \minput{proj_posterior} }%
        \only<2>{ \minput{proj_b1b2_single} }%
        \only<3>{ \minput{proj_b2_single} }%
        \only<4>{ \minput{proj_b1_single} }%
        \only<5>{ \minput{proj_b2_dbd} }%
        \only<6>{ \minput{proj_b1_dbd} }%
      \end{center}
    \end{minipage}
    \begin{minipage}{0.99\textwidth}
      \only<1>{Full posterior for $\beta_1$ and $\beta_2$ and contours of predicted class probability}
      \only<2>{Projected point estimates for $\beta_1$ and $\beta_2$}
      \only<3>{Projected point estimates, constraint $\beta_1 = 0$}
      \only<4>{Projected point estimates, constraint $\beta_2 = 0$}
      \only<5>{Draw-by-draw projection, constraint $\beta_1 = 0$}
      \only<6>{Draw-by-draw projection, constraint $\beta_2 = 0$}
    \end{minipage}
  \end{overlayarea}
\end{frame}

\begin{frame}

  {\Large\color{navyblue} Predictive projection}


  \begin{itemize}
  \item Replace full posterior $p(\theta \mid D)$ with some
    constrained $q(\theta)$ so that the \emph{predictive distribution}
    changes as little as possible
  \item<2-> As the full posterior $p(\theta \mid D)$ is projected to $q(\theta)$
    \begin{itemize}
    \item the prior is also projected and there is no need to define
      priors for submodels separately
    \item<3-> even if we constrain some coefficients to be $0$, the
      predictive inference is conditioned on the information related
      features contributed to the reference model
    \end{itemize}
  \end{itemize}

  
\end{frame}

\frame{

  {\Large\color{navyblue} Projective selection}

\begin{itemize}
	\item How to select a feature combination? \pause
	\item For a given model size, choose feature combination with minimal projective loss \pause
	\item Search heuristics, e.g. 
	\begin{itemize}
        \item Monte Carlo search
        \item Forward search
        \item $L_1$-penalization (as in Lasso)
	\end{itemize} \pause 
      \item Use cross-validation to select the appropriate model size
        \begin{itemize}
        \item need to cross-validate over the search paths
        \end{itemize}
      \end{itemize} 
}

\begin{frame}

  {\Large\color{navyblue} Selection induced bias in variable selection}

  \includegraphics[height=0.88\textheight]{simulated_variability.pdf}
   \vspace{-1.5\baselineskip}
   \mbox{{\hspace{8cm} \footnotesize \href{http://link.springer.com/article/10.1007/s11222-016-9649-y}{Piironen \& Vehtari (2017)}}}

\end{frame}

\begin{frame}

  {\Large\color{navyblue} Selection induced bias in variable selection}

  \includegraphics[height=0.88\textheight]{real_searchpath.pdf}
   \vspace{-2\baselineskip}
   \mbox{{\hspace{9.5cm} \parbox[t]{12cm}{\footnotesize \href{http://link.springer.com/article/10.1007/s11222-016-9649-y}{Piironen \&\\ Vehtari (2017)}}}}

\end{frame}

\begin{frame}
  
  {\Large\color{navyblue} Bodyfat: small $p$ example of projection predictive}
  
  Predict bodyfat percentage. The reference value is obtained by
  immersing person in water. $n=251$.

  \pause
  \vspace{-0.7\baselineskip}
  \includegraphics[width=7.7cm]{bodyfat_corr.pdf}

\end{frame}

\begin{frame}
  
  {\Large\color{navyblue} Bodyfat}

  Marginal posteriors of coefficients
  
  \includegraphics[width=11cm]{bodyfat_mcmc_areas.pdf}

\end{frame}

\begin{frame}
  
  {\Large\color{navyblue} Bodyfat}

  Bivariate marginal of weight and height
  
  \includegraphics[width=7.5cm]{bodyfat_mcmc_scatter.pdf}

\end{frame}

\begin{frame}
  
  {\Large\color{navyblue} Bodyfat}

  The predictive performance of the full and submodels
  
  \includegraphics[width=11cm]{bodyfat_varsel_plot.pdf}

\end{frame}


\begin{frame}
  
  {\Large\color{navyblue} Bodyfat}

  Marginals of projected posterior
  
  \includegraphics[width=11cm]{bodyfat_proj_mcmc_areas.pdf}

\end{frame}

\begin{frame}
  
  {\Large\color{navyblue} Bodyfat}

  Projected posterior is not just the conditional of joint
  
  \includegraphics[width=7.5cm]{bodyfat_proj_mcmc_scatter.pdf}

\end{frame}


% \begin{frame}
  
%   {\Large\color{navyblue} Bodyfat}

%   Projected posterior is different than posterior conditioned only on selected features

%   \vspace{-0.7\baselineskip}
%   \includegraphics[width=11cm]{bodyfat_compare_sigmas.pdf}

% \end{frame}

% Case study \url{avehtari.github.io/modelselection/bodyfat.html}
\begin{frame}
  
  {\Large\color{navyblue} Projection of Gaussian graphical models}\\

  \begin{itemize}
  \item Williams, Piironen, Vehtari, Rast (2018). Bayesian estimation of Gaussian graphical models with projection predictive selection. \href{https://arxiv.org/abs/1801.05725}{arXiv:1801.05725}\\
    \includegraphics[width=8.5cm]{ceu.pdf}\\
    {\footnotesize CEU genetic network. BGL: Bayesian glasso; GL: glasso;
      TIGER: tuning insensitive graph estimation and regression; BMA:
      Bayesian model averaging; MAP: Maximum a posteriori; Projection:
      projection predictive selection.}
  \end{itemize}
  
\end{frame}

\begin{frame}
  
  {\Large\color{navyblue} More results}\\

  {\footnotesize
  \begin{itemize}
  \item More results projpred vs. Lasso and elastic net:\\
    Piironen, Paasiniemi, Vehtari (2018). Projective Inference in
    High-dimensional Problems: Prediction and Feature Selection.
    \href{https://arxiv.org/abs/arXiv:1810.02406}{arXiv:1810.02406}
  \item More results projpred vs. marginal posterior probabilities:\\
    Piironen and Vehtari (2017). Comparison of Bayesian predictive
    methods for model selection. Statistics and Computing,
    27(3):711-735.
    \href{https://dx.doi.org/10.1007/s11222-016-9649-y}{doi:10.1007/s11222-016-9649-y}.
  \item projpred for Gaussian graphical models:\\
    Williams, Piironen, Vehtari, Rast (2018). Bayesian estimation of Gaussian graphical models with projection predictive selection. \href{https://arxiv.org/abs/1801.05725}{arXiv:1801.05725}
  \item More results for Bayes SPC:\\
    Piironen and Vehtari (2018). Iterative supervised principal components. 21st AISTATS, PMLR 84:106-114. \href{http://proceedings.mlr.press/v84/piironen18a.html}{Online}.
  \item Several case studies for small to moderate dimensional ($p=4 \ldots 100$) small data:\\
    Vehtari (2018). Model assessment, selection and inference after
    selection. \url{https://avehtari.github.io/modelselection/}
  \end{itemize}
  }
\end{frame}


\frame{

  {\Large\color{navyblue} Take-home messages (part 2)}

\begin{itemize}
\item Sparse priors do not automate variable selection
  \begin{itemize}
  \item Don't trust marginal posteriors
  \end{itemize}
\item<2-> Reference model + projection can improve feature selection
  \begin{itemize}
  \item Excellent tradeoff between accuracy and model complexity
  \item Useful also for identifying all the relevant features
  \end{itemize}
\item<3-> Well developed for GLMs, but can be used also with other model families
\item<4-> More details and results (+ some theoretical discussion) in the paper
  \begin{itemize}
  \item Piironen, Paasiniemi, Vehtari (2018). Projective Inference in
    High-dimensional Problems: Prediction and Feature
    Selection. \href{https://arxiv.org/abs/arXiv:1810.02406}{arXiv:1810.02406}
  \end{itemize}
\item<5-> R-package {\tt projpred} in CRAN and github
  \url{https://github.com/stan-dev/projpred}\\ (easy to use, e.g. with
  RStan, RStanARM, brms)
\end{itemize}
}

\begin{frame}{}

  {\Large\color{navyblue}  References}

  References and more at \url{avehtari.github.io/masterclass/} and
  \url{avehtari.github.io/modelselection//}
  \begin{list1}
  \item Model selection tutorial at StanCon 2018 Asilomar
    \begin{list2}
    \item more about projection predictive variable selection
    \end{list2}
  \item Regularized horseshoe talk at StanCon 2018 Asilomar
  \item Several case studies
  \item References with links to open access pdfs
  \end{list1}
  
\end{frame}

\end{document}

%%% Local Variables: 
%%% mode: latex
%%% TeX-master: t
%%% End:
